/*
This do file takes one simulated dataset (Scenario 1 Sample Size 500, medium survival, large heterogeneity)
and shows the analysis approach adopted in the TSD.

This do file fits flexible parametric survival models
*/
cd "${DRIVE}/GitSoftware/TSD_simulation/Files_for_Appendix\Example"

// stexpect3 calculates marginal expected survival and hazard rates
adopath ++ "../../stexpect3"

use sim1, clear

stset t,f(d==1,2)

// Kaplan-Meier plot
sts graph, ytitle("All-cause survival") xtitle("Time from randomisation") 
sts gen S_km = s


// calculate expected survival and hazard functions
// increase observations as this is evaluated at 1000 points.
set obs 1000
gen datediag = mdy(1,1,2009) + runiform(1,365)
gen sex = 1
stexpect3 using life_tables_1971_2009_england, agediag(age) ///
   datediag(datediag) pmother(sex) pmage(age) pmyear(calendar_year) every(0.2) maxt(80) pmmaxyear(2009) pmmaxage(99)
replace exphaz = exphaz*1000

// FPM models
// Use 3, 4 and 4 df as an example
range tt 0 3 
gen t3 = 3 in 1 
forvalues df = 3/5  {
	stpm2, scale(hazard) df(`df')
	estimates store stpm2_`df'
	predict S1_df`df', surv timevar(tt)
	predict h1_df`df', hazard timevar(tt)
	replace h1_df`df' = h1_df`df'*1000
	estat ic
	local AIC_`df': display %4.3f el(r(S),1,5)
}

// Plot survival function
twoway (line S_km _t, sort connect(stairstep) ) ///
		(line S1_df3 tt, sort) ///
		(line S1_df4 tt, sort) ///
		(line S1_df5 tt, sort) ///
		(line expsurv t_exp if t_exp<=3, lpattern(dot) lcolor(black)) ///
		,legend(order(1 "K-M" 2 "2 df (AIC=`AIC_2')" 3 "3 df (AIC=`AIC_3')" 4 "4 df (AIC=`AIC_4')" 5 "Expected") pos(7) cols(2) ring(0) size(small)) ///
		ylabel(0(0.2)1) ///
		ytitle("Survival function") ///
		title("Flexible Parametric Survival Models") ///
		name(surv1, replace)
    
// Plot hazard functions
// high innitial hazard removed 
twoway (line S_km _t, sort connect(stairstep) lcolor(white)) /// Add a white dummy line for K-M 
		(line h1_df3 tt if tt>0.02, sort) ///
		(line h1_df4 tt if tt>0.02, sort) ///
		(line h1_df5 tt if tt>0.02, sort) ///
		(line exphaz t_exp if t_exp<=3, lpattern(dot) lcolor(black)) ///
		,legend(order(2 "2 df (AIC=`AIC_2')" 3 "3 df (AIC=`AIC_3')" 4 "4 df (AIC=`AIC_4')" 5 "Expected") pos(7) cols(2) ring(0) size(small)) ///
		ylabel(100 200 500 1000,angle(h)) ///
    yscale(log) ///
		ytitle("Mortality rate (per 1000 py)") ///
		title("Flexible Parametric Survival Models") ///
		name(haz1, replace)    
    
// Estimates of RMST at three years    
forvalues df = 3/5  {
    estimates restore stpm2_`df'
    predict rmst_df`df', rmst tmax(3)
}
list rmst_df*  in 1, noobs  
    

// Now plot extrapolated curves to 40 years
// Also calculate mean survival
range tt80 0 80
forvalues df = 3/5  {
  estimates restore stpm2_`df'
	predict S2_df`df', surv timevar(tt80)
	predict h2_df`df', hazard timevar(tt80)
	replace h2_df`df' = h2_df`df'*1000
}

twoway (line S_km _t, sort connect(stairstep)) ///
		(line S2_df3 tt80, sort) ///
		(line S2_df4 tt80, sort) ///
		(line S2_df5 tt80, sort) ///
		(line expsurv t_exp if t_exp<=80, lpattern(dot) lcolor(black)) ///
		,legend(order(2 "2 df (AIC=`AIC_2')" 3 "3 df (AIC=`AIC_3')" 4 "4 df (AIC=`AIC_4')" 5 "Expected") pos(7) cols(2) ring(0) size(small)) ///
		title("Standard Models: Extrapolated Survival Functions") ///
		ytitle("Survival function") ///
		name(surv2, replace)
				
        
// only plot expected hazard when expected survival is >0.01    
twoway 	(line S_km _t, sort connect(stairstep) color(white)) /// Add a white dummy line for K-M  
		(line h2_df3 tt80 if tt80>3, sort) ///
		(line h2_df4 tt80 if tt80>3, sort) ///
		(line h2_df5 tt80 if tt80>3, sort) ///
		(line exphaz t_exp if t_exp<=80 & t_exp>=3 & expsurv>0.01, lpattern(dot) lcolor(black)) ///
		,legend(order(2 "2 df (AIC=`AIC_2')" 3 "3 df (AIC=`AIC_3')" 4 "4 df (AIC=`AIC_4')" 5 "Expected") pos(1) cols(1) ring(0) size(small)) ///		title("Flexible Parametric Survival Models") ///
		ytitle("Mortality rate (per 1000 person years)") ///
		name(haz2, replace)			

// restricted mean survival (evaluted at 80 years)
// S(t) not a zero by 80 years, but obviously a daft extrapolation 
forvalues df = 3/5  {
    estimates restore stpm2_`df'
    predict ms_df`df', rmst tmax(80)
}
list ms_df*  in 1, noobs  
